library(twitteR)
## Warning: package 'twitteR' was built under R version 4.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
## 
##     lookup_statuses
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(RColorBrewer)
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
CONSUMER_SECRET <- "PvNTAZrNQNUPf655uhQGDvqnBhEkOtdnCUChsPooSBWrMeCdj6"
CONSUMER_KEY <- "0eKM7ywAbJIBhoc6qUtPGZ2gc"
ACCESS_SECRET <- "qSvpFsynx6zadfrSvrTaihnqm4jfZ4XNEI6F6LwmWJ2B7"
ACCESS_TOKEN <- "1594992291864727553-wz6TgceS7HWYmZ7Tvn05kMNFAOBkaw"
setup_twitter_oauth(consumer_key = CONSUMER_KEY,
                    consumer_secret = CONSUMER_SECRET,
                    access_token = ACCESS_TOKEN,
                    access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"

Extract 10000 tweets from Twitter using twitteR package including retweets

NBAtweets <- searchTwitter("NBA",
                      n=10000,
                      since = "2022-12-05",
                      until = "2022-12-11",
                      lang = "en",
                      retryOnRateLimit = 120
                      )

Convert into Data Frame

NBAdf <- twListToDF(NBAtweets)
head(NBAdf$text)[1:5]
## [1] "@EBomb_NBA I tend to agree.\n\nIDK who they could get. I'd want a win now trade.\n\nSomething like KAT for OG plus"                    
## [2] "RT @NBA: An updated look at the NBA Standings 👀\n\nFor more, download the NBA App\n🏀 https://t.co/6FlAli0aPP https://t.co/o7BJTmCmCS"
## [3] "@NBA better than curry"                                                                                                                
## [4] "Pistons news: Jalen Duren and some big problems for Detroit ⁦@NBA⁩  https://t.co/qmxi9uHewa"                                             
## [5] "RT @NBA: Big DUNKS from LUKA &amp; ZION headline Friday's TOP 10 PLAYS! https://t.co/wOKl1KIiTH"

Saving and loading the Data Frame

save(NBAdf,file = "NBAdf.Rdata")
load(file = "NBAdf.Rdata")

—-Original Tweets—–

Subsetting original tweets

tweetsNBA <- NBAdf %>%
  select(screenName,text,created,isRetweet) %>% filter(isRetweet == FALSE)

Saving and loading Original Tweets

save(tweetsNBA,file = "Original_Tweetsdf.Rdata")
load(file = "Original_Tweetsdf.Rdata")

Grouping the data created

tweetsNBA %>%  
  group_by(1) %>%  
  summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-12-10 23:59:59 2022-12-10 21:21:02
data1 <- tweetsNBA %>%  mutate(Created_At_Round = created %>% round(units = 'hours')
                               %>% as.POSIXct())

mn <- tweetsNBA %>% pull(created) %>% min()
mn 
## [1] "2022-12-10 21:21:02 UTC"
mx <- tweetsNBA %>% pull(created) %>% max()
mx
## [1] "2022-12-10 23:59:59 UTC"

Plot on original tweets by time.

Orig_plot <- ggplot(data1, aes(x = Created_At_Round)) +
  geom_histogram(aes(fill = ..count..)) +
  theme(legend.position = "right") +
  xlab("Time") + ylab("Number of Tweets") + 
  scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

Orig_plot %>% ggplotly()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
##   Please report the issue at <]8;;https://github.com/tidyverse/ggplot2/issueshttps://github.com/tidyverse/ggplot2/issues]8;;>.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

—-ReTweets—-

Subsetting retweets

NBA_retweets <- NBAdf %>%
  select(screenName,text,created, isRetweet) %>% filter(isRetweet == TRUE)

Saving and loading ReTweets

save(NBA_retweets,file = "ReTweetsdf.Rdata")
load(file = "ReTweetsdf.Rdata")

Grouping the data created

NBA_retweets %>%  
  group_by(1) %>%  
  summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-12-10 23:59:59 2022-12-10 21:21:03
data2 <- NBA_retweets %>%  mutate(Created_At_Round = created %>% round(units = 'hours') %>% as.POSIXct())

mn <- NBA_retweets %>% pull(created) %>% min()
mn 
## [1] "2022-12-10 21:21:03 UTC"
mx <- NBA_retweets %>% pull(created) %>% max()
mx
## [1] "2022-12-10 23:59:59 UTC"

Plot on reTweets by time

reTweet_plot <- ggplot(data2, aes(x = Created_At_Round)) +
  geom_histogram(aes(fill = ..count..)) +
  theme(legend.position = "right") +
  xlab("Time") + ylab("Number of ReTweets") + 
  scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

reTweet_plot %>% ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.